#########################################################################   
#                                 INFO                                  #   
#########################################################################  

  # PROJECT: Gender quotas in Tunisia's 2018 municipal elections
  # PURPOSE: Create summary table for appendix
  # CREATED: December 2020 by Julia Clark
  # INPUTS:  quotas_mun_clean.R, quotas_list_clean.R,
  # OUTPUTS: summarystats.tex, lecssummary.tex

#########################################################################   
#               SETUP                                                   #   
######################################################################### 

  ######## ENVIRONMENT
  
  rm(list = ls()) 
  setwd("~/Desktop/replication_what_men_want/data/")
  
  ######## PACKAGES
  
  need <- c("tidyr", "dplyr", "xtable") 
  have <- need %in% rownames(installed.packages()) 
  if(any(!have)) install.packages(need[!have]) 
  invisible(lapply(need, library, character.only=T)) 
  
  ######## SET ALL TABLES TO SHOW "NA" BY DEFAULT
  
  table = function (..., useNA = 'ifany') base::table(..., useNA = useNA)  
  
#########################################################################   
#               LOAD DATA                                               #   
#########################################################################  

  # List-level and candidate-level data
  load("quotas_list_clean.RData")  # list
  load("quotas_lecs_clean.RData") # lecs
  
 
#########################################################################   
#               CREATE SUMMARY TABLES: LIST-LEVEL                       #   
#########################################################################  
  
  ##### CONTINUOUS VARIABLES
  
  # All continuous variables
  listcont <- list %>%
    
    # Select and rename variables 
    dplyr::select(
      `Female-headed list (FHL)` = FHL,
      `Relative strength in 2014` = votes_per_nat_mean_2014_parl,
      `Relative strength in 2014 squared` = votes_per_nat_mean_2014_parlsq,
      `Above mean relative strength in 2014` = above_mean_2014_parl,
      `Log population` = log_pop,
      `Female turnout in 2014` = turnout_per_pop_f_2014_parl,
      `List vote share in 2018` = votes_per,
      `List rank by vote share in 2018` = votes_per_rank,
      `List seats won in 2018` = seats_lists_2018,
      `List won mayor in 2018` = won_mayor,
      `Municipal council seats` = seats,
      `Number of lists running in 2018` = n_lists_2018,
      `Secular index (DI)` = di_relig_pol_index,
      `Feminist index (DI)` = di_women_pol_index,
      `Female special delegation members` = sd_fem_per,
      `Women w/ secondary edu or more` = edu_f_high,
      `Men w/ secondary edu or more` = edu_m_high,
      `Unemployment rate` = emp_unemploy_rate,
      `Urbanization rate` = pop_com_per) %>%
    
    # Find the mean, st. dev., min, and max for each variable 
    summarise_each(funs(N = sum(!is.na(.)), Mean = mean(., na.rm = T), 
                        St.Dev = sd(., na.rm = T), Min = min(., na.rm = T), Max = max(., na.rm = T))) %>%
    
    # Move summary stats to columns
    gather(key, value, everything()) %>% 
    separate(key, into = c("variable", "stat"), sep = "_") %>%
    spread(stat, value) %>%
    dplyr::rename(Variable = variable) %>%
    
    # Set order of summary statistics 
    dplyr::select(Variable, N, Mean, St.Dev, Min, Max) %>%
    
    # Set order of variables
    mutate(Variable = factor(Variable, levels = c("Female-headed list (FHL)", 
                                                  "Relative strength in 2014", 
                                                  "Relative strength in 2014 squared",
                                                  "Above mean relative strength in 2014", 
                                                  "Log population",
                                                  "Female turnout in 2014", 
                                                  "List vote share in 2018", 
                                                  "List rank by vote share in 2018",
                                                  "List seats won in 2018", 
                                                  "List won mayor in 2018",
                                                  "Municipal council seats",
                                                  "Number of lists running in 2018",
                                                  "Secular index (DI)", "Feminist index (DI)", 
                                                  "Female special delegation members", 
                                                  "Women w/ secondary edu or more", 
                                                  "Men w/ secondary edu or more",
                                                  "Unemployment rate",
                                                  "Urbanization rate"))) %>%
    
    # Sort
    arrange(Variable) %>%
    
    # Round all numeric variables to one decimal point
    mutate_each(funs(round(., 1)), -Variable)
  
  listcont
  
  ##### CATEGORICAL VARIABLES
  
  # Summarize each
  listtype  <- list %>%  
    group_by(list_class_ennahdha) %>% 
    dplyr::count() %>% 
    dplyr::rename(Variable = list_class_ennahdha, N = n) %>%
    mutate(Variable = as.character(Variable))
  
  # Join
  listcate <- listtype %>%
    mutate(Mean = NA, St.Dev = NA, Min = NA, Max = NA)
  
  ##### JOIN BOTH AND EXPORT
  
  # Join continuous and categorical
  sum <- rbind(listcont, listcate) %>%
    mutate(N = round(N, 0))
  
  # Export to latex
  xtable(sum, type = "latex")
  print(xtable(sum, type = "latex", caption="Summary of Variables for List-Level Analysis", 
               label = "summarystats", digits=c(0,0,0,1,1,1,1)), 
        file = "~/Desktop/replication_what_men_want/tables/summarystats.tex",
        table.placement = getOption("xtable.table.placement", "H"),
        hline.after = getOption("xtable.hline.after", c(-1,0,6, 12, 19,23, nrow(sum))),
        include.rownames=FALSE,
        caption.placement="top",
        size="scriptsize")
  

  
#########################################################################   
#             CREATE SUMMARY TABLES: CANDIDATE-LEVEL                    #   
#########################################################################  
  
  ##### CONTINUOUS VARIABLES
  
  # All continuous variables
  lecscont <- lecs %>%
    
    # Select and rename variables 
    dplyr::select(
      `Female Candidate` = cand_fem,
      `University Education` = edu_hig,
      `Political Ambition` = ambition_yes,
      `Political Knowledge` = know_seats_correct,
      `Skill Index` = skill_index,
      `Political Experience` = exp_pol,
      `Activities Index` = act_index,
      `Membership Index` = mem_index,
      `RCD Member` = rcd,
      `Party Member` = party_member,
      `Years in Party` = party_years) %>%
    
    # Find the mean, st. dev., min, and max for each variable 
    summarise_each(funs(N = sum(!is.na(.)), Mean = mean(., na.rm = T), 
                        St.Dev = sd(., na.rm = T), Min = min(., na.rm = T), Max = max(., na.rm = T))) %>%
    
    # Move summary stats to columns
    gather(key, value, everything()) %>% 
    separate(key, into = c("variable", "stat"), sep = "_") %>%
    spread(stat, value) %>%
    dplyr::rename(Variable = variable) %>%
    
    # Set order of summary statistics 
    dplyr::select(Variable, N, Mean, St.Dev, Min, Max) %>%
    
    # Set order of variables
    mutate(Variable = factor(Variable, levels = c("Female Candidate",
                                                  "University Education",
                                                  "Political Ambition",
                                                  "Political Knowledge",
                                                  "Skill Index",
                                                  "Political Experience",
                                                  "Activities Index",
                                                  "Membership Index",
                                                  "RCD Member",
                                                  "Party Member",
                                                  "Years in Party"))) %>%
    
    # Sort
    arrange(Variable) %>%
    
    # Round all numeric variables to one decimal point
    mutate_each(funs(round(., 1)), -Variable)
  
  lecscont
  
  # Export to latex
  xtable(lecscont, type = "latex")
  print(xtable(lecscont, type = "latex", caption="Summary of Variables for Candidate-Level Analysis", 
               label = "lecssummary", digits=c(0,0,0,1,1,0,0)), 
        file = "~/Desktop/replication_what_men_want/tables/lecssummary.tex",
        table.placement = getOption("xtable.table.placement", "H"),
        include.rownames=FALSE,
        caption.placement="top",
        size="scriptsize")
  